SNPfiltR quality filtering pipeline

Read in your unfiltered vcf file

library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
library(ggplot2)
library(gridExtra)
library(ggridges)
library(adegenet)
## Loading required package: ade4
## 
##    /// adegenet 2.1.5 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#read in vcf as vcfR
vcfR <- read.vcfR("~/Desktop/hipposideros/n3.vcf")
### check the metadata present in your vcf
vcfR
vcfR@fix[1:10,1:8]
vcfR@gt[1:10,1:2]

#read in sample metadata
samps<-read.csv("~/Desktop/hipposideros/hipposideros.sampling.csv")
#retain samples that were sequenced successfully
samps<-samps[samps$id %in% colnames(vcfR@gt),]
samps$id == colnames(vcfR@gt)[-1] #check that order matches

#generate popmap file. Two column popmap with the same format as stacks, and the columns must be named 'id' and 'pop'
popmap<-data.frame(id=samps$id,
                   pop=paste(samps$species, samps$Island, sep = "_"))
table(popmap$pop)

step 1: Implement quality filters that don’t involve missing data. This is because removing low data samples will alter percentage/quantile based missing data cutoffs, so we wait to implement those until after deciding on our final set of samples for downstream analysis

#hard filter to minimum depth of 3, and minimum genotype quality of 30
vcfR<-hard_filter(vcfR, depth = 3, gq = 30)
## 0.02% of genotypes fall below a read depth of 3 and were converted to NA
## 7.81% of genotypes fall below a genotype quality of 30 and were converted to NA

Use this function to filter for allele balance

from Puritz SNP filtering tutorial “Allele balance: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous, we expect that the allele balance in our data (for real loci) should be close to 0.5”

#execute allele balance filter
vcfR<-filter_allele_balance(vcfR)
## 11.48% of het genotypes (0.66% of all genotypes) fall outside of 0.25 - 0.75 allele balance ratio and were converted to NA

max depth filter (super high depth loci are likely multiple loci stuck together into a single paralogous locus).

#visualize and pick appropriate max depth cutoff
max_depth(vcfR)
## cutoff is not specified, exploratory visualization will be generated.

## dashed line indicates a mean depth across all SNPs of 34.9

#filter vcf by the max depth cutoff you chose
vcfR<-max_depth(vcfR, maxdepth = 100)
## maxdepth cutoff is specified, filtered vcfR object will be returned
## 2.43% of SNPs were above a mean depth of 100 and were removed from the vcf

#check vcfR to see how many SNPs we have left
vcfR
## ***** Object of Class vcfR *****
## 33 samples
## 55362 CHROMs
## 147,323 variants
## Object size: 222.9 Mb
## 45.94 percent missing data
## *****        *****         *****

Step 2: visualize missing data by sample. Check out the visualizations and make decision on which samples to keep for downstream analysis.

#run function to visualize samples
miss<-missing_by_sample(vcfR=vcfR)
## No popmap provided

#run function to drop samples above the threshold we want from the vcf
#setting it conservatively for now
vcfR<-missing_by_sample(vcfR=vcfR, cutoff = .9)
## 1 samples are above a 0.9 missing data cutoff, and were removed from VCF

#subset popmap to only include retained individuals
popmap<-popmap[popmap$id %in% colnames(vcfR@gt),]

#alternatively, you can drop individuals from vcfR manually using the following syntax, if a strict cutoff doesn't work for your dataset
#vcfR@gt <- vcfR@gt[,colnames(vcfR@gt) != "KVO248_H_dinops_Isabel"]
#drop invariant sites plus singletons for plotting
vcf.filt<-min_mac(vcfR, min.mac = 2)
## 55.97% of SNPs fell below a minor allele count of 2 and were removed from the VCF

#assess the effect of missing data on clustering inferences
assess_missing_data_tsne(vcf.filt, popmap = popmap, thresholds =c(.7,.8,.9, 1), clustering = FALSE)
## cutoff is specified, filtered vcfR object will be returned
## 47.19% of SNPs fell below a completeness cutoff of 0.7 and were removed from the VCF

## cutoff is specified, filtered vcfR object will be returned

## 59.93% of SNPs fell below a completeness cutoff of 0.8 and were removed from the VCF

## cutoff is specified, filtered vcfR object will be returned

## 82.68% of SNPs fell below a completeness cutoff of 0.9 and were removed from the VCF

## cutoff is specified, filtered vcfR object will be returned

## 99.46% of SNPs fell below a completeness cutoff of 1 and were removed from the VCF

## [[1]]
##             V1           V2                    pop     missing
## 1   -16.744456  38.05600626        diadema_Gatokae 0.194534466
## 2    18.429990  30.35880386     dinops_Guadalcanal 0.743919886
## 3    48.847937  78.03315228    diadema_Guadalcanal 0.008233336
## 4    57.445403  79.91380439    diadema_Guadalcanal 0.018072465
## 5    71.605100 -83.62683122     dinops_Guadalcanal 0.017605325
## 6    79.694850 -79.16750042     dinops_Guadalcanal 0.158243555
## 7    73.788514 -72.39200762     dinops_Guadalcanal 0.053399901
## 8    58.840082  71.21247151    diadema_Guadalcanal 0.009167616
## 9    50.093631  69.19411417    diadema_Guadalcanal 0.058947184
## 10   -6.287859  22.56222630         diadema_Isabel 0.395229336
## 11  -16.119427  21.57419760         diadema_Isabel 0.137660214
## 12  -23.032231  26.49747140         diadema_Isabel 0.043589968
## 13   -8.776985  30.13336524         diadema_Isabel 0.383434060
## 14    4.358041  26.74448750         diadema_Isabel 0.760999679
## 15   70.310253  -8.64102199         dinops_Rendova 0.016437476
## 16   70.682287   0.26588171         dinops_Rendova 0.010773408
## 17   22.874371  18.13162684         dinops_Rendova 0.676885346
## 18   79.592881   0.02234689         dinops_Rendova 0.006977898
## 19   79.233443  -8.88428540         dinops_Rendova 0.007153076
## 20  -19.066359 -14.71036261        diadema_Rendova 0.017021401
## 21  -17.760806 -21.28675901        demissus_Makira 0.054684535
## 22   25.227486  34.32396583     dinops_Guadalcanal 0.618492891
## 23   66.118207 -76.74591245     dinops_Guadalcanal 0.032728971
## 24 -120.786111 -41.52728831 diadema_Ngella Islands 0.008817261
## 25 -138.359281 -36.44430848 diadema_Ngella Islands 0.025196345
## 26 -132.019279 -27.04641869 diadema_Ngella Islands 0.010860997
## 27 -121.148636 -30.21588551 diadema_Ngella Islands 0.014539721
## 28 -131.453467 -45.40259541 diadema_Ngella Islands 0.009576363
## 29 -128.739874 -36.12114582 diadema_Ngella Islands 0.014422937
## 30   -6.882223 -17.96834825    diadema_New Britain 0.033750839
## 31    4.239172  38.66812842            diadema_PNG 0.117252051
## 32    5.795346  14.48862103            diadema_PNG 0.090917054
## 
## [[2]]
##             V1         V2                    pop     missing
## 1    52.923162  -25.77476        diadema_Gatokae 0.120503251
## 2    10.094502  -60.07449     dinops_Guadalcanal 0.686583817
## 3   -58.042845  -84.63555    diadema_Guadalcanal 0.004193759
## 4   -74.006172  -82.73297    diadema_Guadalcanal 0.006579200
## 5   -38.721031  190.74533     dinops_Guadalcanal 0.015120619
## 6   -50.215902  202.40456     dinops_Guadalcanal 0.083605864
## 7   -26.457070  201.39452     dinops_Guadalcanal 0.017236736
## 8   -75.802445  -98.77197    diadema_Guadalcanal 0.004847832
## 9   -59.529833 -100.62589    diadema_Guadalcanal 0.024470009
## 10   50.818225  -46.60589         diadema_Isabel 0.292062637
## 11   70.020190  -56.05669         diadema_Isabel 0.068485245
## 12   71.433186  -40.44202         diadema_Isabel 0.016582663
## 13   54.069387  -61.54989         diadema_Isabel 0.283867493
## 14   34.366005  -59.85504         diadema_Isabel 0.710322804
## 15  148.853156  -92.91231         dinops_Rendova 0.007194798
## 16  161.684605 -102.88627         dinops_Rendova 0.005001731
## 17   21.460383  -81.65200         dinops_Rendova 0.646800816
## 18  171.621896  -89.98867         dinops_Rendova 0.003539687
## 19  158.775434  -80.01458         dinops_Rendova 0.003308838
## 20   29.739722   16.51303        diadema_Rendova 0.010118887
## 21   26.359527   28.31122        demissus_Makira 0.023277288
## 22   -2.432004  -54.97074     dinops_Guadalcanal 0.541264284
## 23  -37.691403  213.10623     dinops_Guadalcanal 0.012850602
## 24 -147.817391   45.60362 diadema_Ngella Islands 0.003962910
## 25 -152.405896   62.33438 diadema_Ngella Islands 0.009003116
## 26 -132.583462   55.06326 diadema_Ngella Islands 0.005117156
## 27 -154.888937   29.17433 diadema_Ngella Islands 0.005309530
## 28 -134.937920   33.97661 diadema_Ngella Islands 0.006617675
## 29 -165.304868   46.54382 diadema_Ngella Islands 0.005232581
## 30   45.702013   35.01273    diadema_New Britain 0.018814205
## 31  105.330469   25.03939            diadema_PNG 0.062136894
## 32   97.585316   34.32671            diadema_PNG 0.045669655
## 
## [[3]]
##              V1          V2                    pop      missing
## 1  -84.78334522    4.612278        diadema_Gatokae 0.0326686844
## 2   -0.04739858   40.700966     dinops_Guadalcanal 0.4955492256
## 3   49.43361929   35.994807    diadema_Guadalcanal 0.0004450774
## 4   31.77680174   36.150176    diadema_Guadalcanal 0.0008011394
## 5   23.71630897  127.674983     dinops_Guadalcanal 0.0102367812
## 6   24.78776058  146.619844     dinops_Guadalcanal 0.0141534627
## 7   14.98025419  137.851107     dinops_Guadalcanal 0.0037386505
## 8   40.81415890   46.631798    diadema_Guadalcanal 0.0027594801
## 9   40.35902764   25.935140    diadema_Guadalcanal 0.0056079758
## 10 -55.12534500   -7.485090         diadema_Isabel 0.1104682215
## 11 -60.09763668  -20.060000         diadema_Isabel 0.0144205092
## 12 -72.99821752  -17.015104         diadema_Isabel 0.0032935731
## 13 -67.47681654   -4.101519         diadema_Isabel 0.1219512195
## 14 -55.04415941   10.436555         diadema_Isabel 0.5354281645
## 15 -74.42552695 -102.535283         dinops_Rendova 0.0009791704
## 16 -68.06060045 -114.266561         dinops_Rendova 0.0010681859
## 17 -24.34461207   30.244245         dinops_Rendova 0.5198504540
## 18 -86.14843617 -109.039505         dinops_Rendova 0.0011572014
## 19 -79.73562302 -120.773303         dinops_Rendova 0.0007121239
## 20 -12.40629570  -23.569686        diadema_Rendova 0.0048068364
## 21  -4.21590287  -17.760865        demissus_Makira 0.0089905644
## 22   8.40246607   49.030204     dinops_Guadalcanal 0.3154708919
## 23  34.01477791  137.071454     dinops_Guadalcanal 0.0016022788
## 24  83.36004549  -47.430174 diadema_Ngella Islands 0.0008011394
## 25  78.49711589  -13.882633 diadema_Ngella Islands 0.0022253872
## 26  95.18219781  -39.376159 diadema_Ngella Islands 0.0011572014
## 27  91.27376272  -10.974047 diadema_Ngella Islands 0.0016912943
## 28  78.70366989  -34.142018 diadema_Ngella Islands 0.0030265266
## 29  90.12527268  -24.214715 diadema_Ngella Islands 0.0010681859
## 30 -55.91882762   26.336683    diadema_New Britain 0.0097026883
## 31   3.92000199  -71.169603            diadema_PNG 0.0210076553
## 32  11.48150201  -77.493976            diadema_PNG 0.0155777105
## 
## [[4]]
##            V1          V2                    pop missing
## 1   42.868879   -2.552009        diadema_Gatokae       0
## 2   50.004070  -47.005091     dinops_Guadalcanal       0
## 3  -56.447556   63.749656    diadema_Guadalcanal       0
## 4  -71.238425   65.742178    diadema_Guadalcanal       0
## 5  -17.081721 -114.777664     dinops_Guadalcanal       0
## 6  -32.709819 -126.158157     dinops_Guadalcanal       0
## 7   13.741045  -97.804526     dinops_Guadalcanal       0
## 8   21.425413   67.600290    diadema_Guadalcanal       0
## 9  -70.049316   50.288752    diadema_Guadalcanal       0
## 10  32.705080  -50.596141         diadema_Isabel       0
## 11  48.898130  -75.104117         diadema_Isabel       0
## 12 280.718725  104.365502         diadema_Isabel       0
## 13  29.795939  -90.804467         diadema_Isabel       0
## 14 -29.693643 -111.982788         diadema_Isabel       0
## 15  59.358252  -61.364167         dinops_Rendova       0
## 16  44.164650  -60.958850         dinops_Rendova       0
## 17 -64.799692  135.149860         dinops_Rendova       0
## 18  44.475828  -20.933483         dinops_Rendova       0
## 19  -7.091263   28.014397         dinops_Rendova       0
## 20  44.167834   13.620316        diadema_Rendova       0
## 21 -48.830222 -114.846161        demissus_Makira       0
## 22  31.024222  -67.460580     dinops_Guadalcanal       0
## 23 -45.464135 -132.189293     dinops_Guadalcanal       0
## 24  -7.406134   71.752369 diadema_Ngella Islands       0
## 25   6.191084   36.949818 diadema_Ngella Islands       0
## 26 -20.380579   35.598758 diadema_Ngella Islands       0
## 27   4.859013   59.698323 diadema_Ngella Islands       0
## 28  -6.291670   44.305438 diadema_Ngella Islands       0
## 29 -19.460552   50.683255 diadema_Ngella Islands       0
## 30 -97.673627  107.587413    diadema_New Britain       0
## 31 -75.773513  123.321177            diadema_PNG       0
## 32 -84.006299  116.109990            diadema_PNG       0
#do we need to drop samples more aggressively?
#which filtering threshold adequately removes missing data to allow objective clustering?
#Samples at the high end of missing data cutoffs are not clustering correctly and dragging this whole enterprise down
#let's set a better cutoff
vcfR<-missing_by_sample(vcfR=vcfR, cutoff = .75)
## 4 samples are above a 0.75 missing data cutoff, and were removed from VCF

Step 3: Set the arbitrary missing data cutoff

We can visualize the effect that typical missing data cutoffs will have on both the number of SNPs retained and the total missing data in our entire dataset.

We want to choose a cutoff that minimizes the overall missing data in the dataset, while maximizing the total number of loci retained.

Clustering analyses above show that .7, .8, and .9 all seem to allow samples to cluster reasonably

#visualize missing data by SNP and the effect of various cutoffs on the missingness of each sample
missing_by_snp(vcfR)
## cutoff is not specified, exploratory visualizations will be generated
## Picking joint bandwidth of 0.0675

##    filt missingness snps.retained
## 1  0.30  0.18533212        105695
## 2  0.50  0.13838845         95162
## 3  0.60  0.11434738         88605
## 4  0.65  0.09821311         83421
## 5  0.70  0.08990074         80426
## 6  0.75  0.08110860         76970
## 7  0.80  0.06297955         68647
## 8  0.85  0.05344274         63415
## 9  0.90  0.03132406         48045
## 10 0.95  0.01745598         35700
## 11 1.00  0.00000000         18251
#choose a value that retains an acceptable amount of missing data in each sample, and maximizes SNPs retained while minimizing overall missing data, and filter vcf
vcfR<-missing_by_snp(vcfR, cutoff = .9)
## cutoff is specified, filtered vcfR object will be returned
## 67.39% of SNPs fell below a completeness cutoff of 0.9 and were removed from the VCF

check once more to see how many SNPs and individuals remain compared to our original, unfiltered vcf

#use this function to remove loci that have become invariant during our filtering procedure plus singletons which don't contribute to phylogenetic signal
vcfR<-min_mac(vcfR, min.mac = 2)
## 64.14% of SNPs fell below a minor allele count of 2 and were removed from the VCF

#check the number of loci retained
vcfR
## ***** Object of Class vcfR *****
## 28 samples
## 10598 CHROMs
## 17,228 variants
## Object size: 43.3 Mb
## 3.27 percent missing data
## *****        *****         *****
#plot depth per snp and per sample
dp <- extract.gt(vcfR, element = "DP", as.numeric=TRUE)
heatmap.bp(dp, rlabels = FALSE)

#plot genotype quality per snp and per sample
gq <- extract.gt(vcfR, element = "GQ", as.numeric=TRUE)
heatmap.bp(gq, rlabels = FALSE)

We can filter for linkage (one SNP per locus) and use the convenient function ‘write.vcf’ from vcfR to export our filtered vcf file for downstream analyses

#write out the filtered vcf file
#write.vcf(vcfR, file = "~/Desktop/hipposideros/filtered.vcf.gz")
#filter to one SNP per locus to get an unlinked dataset
unlinked.vcf<-distance_thin(vcfR, min.distance = 10000)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=======================                                               |  34%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## 10598 out of 17228 input SNPs were not located within 10000 base-pairs of another SNP and were retained despite filtering
#write out vcf file of unlinked SNPs
#write.vcf(unlinked.vcf, file = "~/Desktop/hipposideros/unlinked.filtered.vcf.gz")